NOTE: from level 4/5 onwards I will execute most steps locally, as the datasets are not as large anymore.

1 Introduction

Here, we will work on the level 5 of the CD4 T cell, which entails:

  • Removal of 3 clusters: activated CD4 T cells, metabolic/poor-quality cells, and mitochondrial+ T cells.
  • Subcluster Follicular Th CXCL13+CBLB+ into 2 clusters
  • Recalculate the UMAP.
  • Subset the Th17, IL2RA+FOXP3+ Treg and Memory T to further stratify them (3P only).

1.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)

1.2 Parameters

# Paths
path_to_obj <- here::here("scRNA-seq/results/R_objects/level_4/CD4_T/CD4_T_integrated_level_4.rds")
# path_to_obj <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_4/CD4_T/CD4_T_integrated_level_4.rds"
path_to_save <- here::here("scRNA-seq/results/R_objects/level_5/CD4_T/paper/CD4_T_subseted_annotated_level_5.rds")
# path_to_save <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_5/CD4_T/paper/CD4_T_subseted_annotated_level_5.rds"
# path_to_save_th <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_5/CD4_T/paper/CD4_T_Th_level_5_annotated.rds"
path_to_save_th <- here::here("scRNA-seq/results/R_objects/level_5/CD4_T/paper/CD4_T_Th_level_5_annotated.rds")


# Colors
color_palette <-  c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", "#5A5156", "#AA0DFE",   
                    "#F8A19F", "#F7E1A0", "#1C8356", "#FEAF16", "#822E1C", "#C4451C",   
                    "#1CBE4F", "#325A9B", "#F6222E", "#FE00FA", "#FBE426", "#16FF32", 
                    "black",   "#3283FE", "#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",   
                    "#90AD1C", "#FE00FA", "#85660D", "#3B00FB", "#822E1C", "coral2", 
                    "#1CFFCE", "#1CBE4F", "#3283FE", "#FBE426", "#F7E1A0", "#325A9B",   
                    "#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD", "#FEAF16", "#683B79",   
                    "#B10DA1", "#1C7F93", "#F8A19F", "dark orange", "#FEAF16", "#FBE426",  
                    "Brown")

# Functions
source(here::here("scRNA-seq/bin/utils.R"))
# source("~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/bin/utils.R")
# source("~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/bin/SLOcatoR_functions.R")
source(here::here("scRNA-seq/bin/SLOcatoR_functions.R"))


# Additional function
plot_subcluster <- function(seurat_obj, pattern) {
  p <- seurat_obj@reductions$umap@cell.embeddings %>%
    as.data.frame() %>%
    mutate(cluster = seurat_obj$annotation_level_3) %>%
    filter(str_detect(cluster, pattern)) %>%
    ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
      geom_point(size = 0.1) +
      theme_classic()
  p
}

1.3 Load data

# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat 
## 37378 features across 65461 samples within 1 assay 
## Active assay: RNA (37378 features, 0 variable features)
##  3 dimensional reductions calculated: pca, umap, harmony
DimPlot(seurat, cols = color_palette, pt.size = 0.2)

2 Subcluster

seurat <- FindSubCluster(
  seurat,
  cluster = "Follicular Th CXCL13+CBLB+",
  subcluster.name = "annotation_level_5",
  resolution = 0.1,
  graph.name = "RNA_snn"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5769
## Number of edges: 106629
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9105
## Number of communities: 36
## Elapsed time: 0 seconds
DimPlot(seurat, group.by = "annotation_level_5", cols = color_palette)

2.1 Markers

Idents(seurat) <- "annotation_level_5"
clusters_interest <- seurat$annotation_level_5 %>%
  unique() %>%
  str_subset("^Follicular Th CXCL13") %>%
  sort()
markers <- purrr::map(clusters_interest, function(x) {
  group_1 <- clusters_interest[which(clusters_interest == x)]
  group_2 <- clusters_interest[which(clusters_interest != x)]
  print(group_1)
  print(group_2)
  df <- FindMarkers(
    seurat,
    ident.1 = group_1,
    ident.2 = group_2,
    only.pos = TRUE,
    logfc.threshold = 0.5,
    verbose = TRUE
  )
  df <- df %>%
    rownames_to_column(var = "gene") %>%
    arrange(desc(avg_log2FC))
  df
})
## [1] "Follicular Th CXCL13+CBLB+_0"
## [1] "Follicular Th CXCL13+CBLB+_1" "Follicular Th CXCL13+CBLB+_2"
## [1] "Follicular Th CXCL13+CBLB+_1"
## [1] "Follicular Th CXCL13+CBLB+_0" "Follicular Th CXCL13+CBLB+_2"
## [1] "Follicular Th CXCL13+CBLB+_2"
## [1] "Follicular Th CXCL13+CBLB+_0" "Follicular Th CXCL13+CBLB+_1"
names(markers) <- clusters_interest
# path_save_markers <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/markers_Tfh_CXCL13_new.xlsx"
path_save_markers <- here::here("scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/markers_Tfh_CXCL13_new.xlsx")
openxlsx::write.xlsx(x = markers, file = path_save_markers, overwrite = TRUE)

3 Subset

clusters_to_exclude <- c("activated CD4 T", "Mitochondrial+ T cells",
                         "metabolic/poor-quality")
mask <- !(Idents(seurat) %in% clusters_to_exclude)
table(mask)
## mask
## FALSE  TRUE 
##  8667 56794
selected_cells <- colnames(seurat)[mask]
seurat <- subset(seurat, cells = selected_cells)

Let us exclude all the cells from Royal London (discussed elsewhere):

seurat <- subset(seurat, hospital != "Royal London")

4 Reprocess

hvg <- find_assay_specific_features(seurat)
seurat <- integrate_assays(
  seurat,
  assay_specific = TRUE,
  shared_hvg = hvg,
  assay_var = "assay",
  n_dim = 30
)

5 Recompute UMAP

seurat <- RunUMAP(seurat, reduction = "harmony", dims = 1:30)
DimPlot(seurat, cols = color_palette)

6 Subset Th

clusters_interest_2 <- "Th17"
th <- subset(seurat, idents = clusters_interest_2)
th <- subset(th, assay == "3P")
th <- th %>%
  FindVariableFeatures() %>%
  ScaleData() %>%
  RunPCA() %>%
  RunHarmony(group.by.vars = "donor_id", reduction = "pca", dims = 1:30) %>%
  RunUMAP(reduction = "harmony", dims = 1:30)

markers_interest <- c("IFNG", "GATA3", "IL26", "IL21", "CCL3", "IL4")
feat_plots <- purrr::map(markers_interest, function(x) {
  p <- FeaturePlot(th, features = x) +
    scale_color_viridis_c(option = "magma")
  p
})
feat_plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

DimPlot(th)

# Cluster
th <- FindNeighbors(th, dims = 1:30, reduction = "harmony")
th <- FindClusters(th, resolution = 0.65)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1091
## Number of edges: 52799
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6121
## Number of communities: 5
## Elapsed time: 0 seconds
DimPlot(th)

# Markers
markers_th <- FindAllMarkers(th, only.pos = TRUE, logfc.threshold = 0.5)

7 Annotation (as of 5th Nov, 2021)

All CD4 T cells:

seurat$annotation_paper <- case_when(
  seurat$annotation_level_5 == "Naive" ~ "Naive",
  seurat$annotation_level_5 == "Central Mem PASK+" ~ "CM Pre-non-Tfh",
  seurat$annotation_level_5 == "Central Mem PASK-" ~ "CM PreTfh",
  seurat$annotation_level_5 == "Memory T cells" ~ "T-Trans-Mem",
  seurat$annotation_level_5 == "Follicular Th CXCL13+CBLB+_1" ~ "T-Eff-Mem",
  seurat$annotation_level_5 == "Th17" ~ "T-helper",
  seurat$annotation_level_5 == "Follicular Th CXCL13+CBLB+_2" ~ "Tfh T:B border",
  seurat$annotation_level_5 == "Follicular Th CXCL13+CBLB+_0" ~ "Tfh-LZ-GC",
  seurat$annotation_level_5 == "Follicular Th TOX2+" ~ "Tfh-LZ-GC",
  seurat$annotation_level_5 == "Follicular Th CXCR5+" ~ "GC-Tfh-SAP",
  seurat$annotation_level_5 == "CD200+TOX+" ~ "GC-Tfh-0X40",
  seurat$annotation_level_5 == "CD4 Non-Tfh KLRB1+ " ~ "Tfh-Mem",
  seurat$annotation_level_5 == "IL2RA+FOXP3+ Treg" ~ "Eff-Tregs",
  seurat$annotation_level_5 == "Treg IKZF2+HPGD+" ~ "non-GC-Tf-regs",
  seurat$annotation_level_5 == "naive Treg IKZF2+" ~ "GC-Tf-regs",
)
ordered_levels <- c("Naive", "CM Pre-non-Tfh", "CM PreTfh", "T-Trans-Mem",
                    "T-Eff-Mem", "T-helper", "Tfh T:B border", "Tfh-LZ-GC",
                    "GC-Tfh-SAP", "GC-Tfh-0X40", "Tfh-Mem", "Eff-Tregs", 
                    "non-GC-Tf-regs", "GC-Tf-regs")
seurat$annotation_paper <- factor(
  seurat$annotation_paper,
  levels = ordered_levels
)
Idents(seurat) <- "annotation_paper"

T helper subsets: since we found a cluster of Th22 cells, we will recluster to find it.

th <- FindSubCluster(
  th,
  cluster = "3",
  graph.name = "RNA_snn",
  subcluster.name = "Th22", 
  resolution = 0.25
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 199
## Number of edges: 6063
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7688
## Number of communities: 2
## Elapsed time: 0 seconds
th$annotation_paper <- case_when(
  th$Th22 == "0" ~ "Th2",
  th$Th22 == "1" ~ "Th0",
  th$Th22 == "2" ~ "Th17",
  th$Th22 == "3_0" ~ "Th1",
  th$Th22 == "3_1" ~ "Th22",
  th$Th22 == "4" ~ "Th1/Th17"
)
ordered_levels_th <- c("Th0", "Th1", "Th2", "Th17", "Th1/Th17", "Th22")
th$annotation_paper <- factor(
  th$annotation_paper,
  levels = ordered_levels_th
)
Idents(th) <- "annotation_paper"
DimPlot(th, cols = color_palette)

Find all markers

# All CD4 T cells
markers_all_cd4 <- FindAllMarkers(seurat, only.pos = TRUE, logfc.threshold = 0.6)
markers_all_cd4 <- markers_all_cd4 %>%
  group_by(cluster) %>%
  arrange(desc(avg_log2FC), group_by = TRUE) %>%
  ungroup()
markers_all_dfs <- purrr::map(unique(markers_all_cd4$cluster), function(x) {
  df <- markers_all_cd4[markers_all_cd4$cluster == x, ]
  df <- df[, c(7, 1:6)]
  df
})
names(markers_all_dfs) <- unique(markers_all_cd4$cluster)
markers_all_dfs <- markers_all_dfs[levels(seurat$annotation_paper)]


# Th cells
markers_th <- FindAllMarkers(th, only.pos = TRUE, logfc.threshold = 0.45)
markers_th <- markers_th %>%
  group_by(cluster) %>%
  arrange(desc(avg_log2FC), group_by = TRUE) %>%
  ungroup()
markers_th_dfs <- purrr::map(unique(markers_th$cluster), function(x) {
  df <- markers_th[markers_th$cluster == x, ]
  df <- df[, c(7, 1:6)]
  df
})
names(markers_th_dfs) <- unique(markers_th$cluster)
markers_th_dfs <- markers_th_dfs[levels(th$annotation_paper)]
names(markers_th_dfs)[names(markers_th_dfs) == "Th1/Th17"] <- "Th1_Th17"

8 Save

# Save Seurat objects
saveRDS(seurat, path_to_save)
saveRDS(th, path_to_save_th)


# Save markers
# path_save_markers <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_T_markers_annotated_level_5.xlsx"
names(markers_all_dfs)[names(markers_all_dfs) == "Tfh T:B border"] <- "Tfh T-B border"
path_save_markers <- here::here("scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_T_markers_annotated_level_5.xlsx")
openxlsx::write.xlsx(
  x = markers_all_dfs,
  file = path_save_markers,
  overwrite = TRUE
)
# path_to_save_markers_th <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/markers_Th_subsets_annotated.xlsx"
path_to_save_markers_th <- here::here("scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/markers_Th_subsets_annotated.xlsx")
openxlsx::write.xlsx(
  x = markers_th_dfs,
  file = path_to_save_markers_th,
  overwrite = TRUE
)


# Save UMAP
umap_level_5 <- DimPlot(seurat, cols = color_palette)
# path_to_save_umap1 <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_T_umap_level_5_2_annotated.png"
path_to_save_umap1 <- here::here("scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_T_umap_level_5_2_annotated.png")
ggsave(
  filename = path_to_save_umap1,
  plot = umap_level_5,
  width = 14,
  height = 12,
  units = "cm"
)
umap_level_5_th <- DimPlot(th, cols = color_palette)
# path_to_save_umap2 <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_Th_umap_annotated.png"
path_to_save_umap2 <- here::here("scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_Th_umap_annotated.png")
ggsave(
  filename = path_to_save_umap2,
  plot = umap_level_5_th,
  width = 14,
  height = 12,
  units = "cm"
)

9 Session information

sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)
## 
## Matrix products: default
## BLAS:   /apps/R/4.0.1/lib64/R/lib/libRblas.so
## LAPACK: /apps/R/4.0.1/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=C                 LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1        stringr_1.4.0        dplyr_1.0.4          purrr_0.3.4          readr_1.4.0          tidyr_1.1.2          tibble_3.0.6         ggplot2_3.3.3        tidyverse_1.3.0      harmony_0.1.0        Rcpp_1.0.6           SeuratWrappers_0.3.0 SeuratObject_4.0.0   Seurat_4.0.0         BiocStyle_2.16.1    
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1         backports_1.2.1      plyr_1.8.6           igraph_1.2.6         lazyeval_0.2.2       splines_4.0.1        listenv_0.8.0        scattermore_0.7      digest_0.6.27        htmltools_0.5.1.1    fansi_0.4.2          magrittr_2.0.1       tensor_1.5           cluster_2.1.0        ROCR_1.0-11          openxlsx_4.2.4       remotes_2.1.1        globals_0.14.0       modelr_0.1.8         matrixStats_0.58.0   colorspace_2.0-0     rvest_0.3.6          ggrepel_0.9.1        haven_2.3.1          xfun_0.21            crayon_1.4.1         jsonlite_1.7.2       spatstat_1.64-1      spatstat.data_2.0-0  survival_3.2-3       zoo_1.8-8            glue_1.4.2           polyclip_1.10-0      gtable_0.3.0         leiden_0.3.7         future.apply_1.7.0   abind_1.4-5          scales_1.1.1         DBI_1.1.1            miniUI_0.1.1.1       viridisLite_0.3.0    xtable_1.8-4         reticulate_1.18      rsvd_1.0.3           htmlwidgets_1.5.3    httr_1.4.2           RColorBrewer_1.1-2   ellipsis_0.3.1       ica_1.0-2            farver_2.0.3         pkgconfig_2.0.3      sass_0.3.1           uwot_0.1.10          dbplyr_2.1.0         deldir_0.2-10        here_1.0.1          
##  [57] utf8_1.1.4           labeling_0.4.2       tidyselect_1.1.0     rlang_0.4.10         reshape2_1.4.4       later_1.1.0.1        munsell_0.5.0        cellranger_1.1.0     tools_4.0.1          cli_2.3.1            generics_0.1.0       broom_0.7.4          ggridges_0.5.3       evaluate_0.14        fastmap_1.1.0        yaml_2.2.1           goftest_1.2-2        knitr_1.31           fs_1.5.0             fitdistrplus_1.1-3   zip_2.2.0            RANN_2.6.1           pbapply_1.4-3        future_1.21.0        nlme_3.1-148         mime_0.10            xml2_1.3.2           compiler_4.0.1       rstudioapi_0.13      plotly_4.9.3         png_0.1-7            spatstat.utils_2.0-0 reprex_1.0.0         bslib_0.2.4          stringi_1.5.3        highr_0.8            ps_1.5.0             RSpectra_0.16-0      lattice_0.20-41      Matrix_1.2-18        vctrs_0.3.6          pillar_1.5.0         lifecycle_1.0.0      BiocManager_1.30.10  lmtest_0.9-38        jquerylib_0.1.3      RcppAnnoy_0.0.18     data.table_1.14.0    cowplot_1.1.1        irlba_2.3.3          httpuv_1.5.5         patchwork_1.1.1      R6_2.5.0             bookdown_0.21        promises_1.2.0.1     KernSmooth_2.23-17  
## [113] gridExtra_2.3        parallelly_1.23.0    codetools_0.2-16     MASS_7.3-51.6        assertthat_0.2.1     rprojroot_2.0.2      withr_2.4.1          sctransform_0.3.2    mgcv_1.8-31          parallel_4.0.1       hms_1.0.0            grid_4.0.1           rpart_4.1-15         rmarkdown_2.7        Rtsne_0.15           shiny_1.6.0          lubridate_1.7.9.2